#### MASTER FILE
## From Unemployment to Self-employment: An Evaluation of Self-employment Assistance Programs
## by Alexandre Gaillard & Sumudu Kankanamge 
## June 2023.



## This file produce all the results of the paper, including the online Appendix

## Library
library(plyr)
library(data.table)
library(reldist)
library(fixest)
library(tidyverse)
library(cdlTools)
library(stargazer)
library(lubridate)
library(ggpubr)
library(ggplot2)
library(foreign)
library(nnet)
library(stargazer)
library(gtsummary)
library(mlogit)
library(VGAM)
library(haven)
library(questionr)
library(AER)
library(broom)
library(survey)

## Sample selection
years_selected_transition  = c(1994:2015)

#### Table Bimonthly Gross Flow Rates (CPS)  #### 
load(file=paste0("/Users/alexandregaillard/Dropbox (Personal)/JOLE_revision/final_files_JOLE/Data/CPS/CPS_data_couples_quarter_v2023_ELMM.RData"))

b = final_data[,c("month_id","id_month","couple","unique_id","YEAR","AGE","WTFINL","jobstatus","jobstatus4")]

b$super_id2 = ifelse(b$month_id == "3", (as.numeric(b$id_month)-2)*10000 + as.numeric(b$month_id) + as.numeric(b$couple)*1000000, (as.numeric(b$id_month))*10000 + as.numeric(b$month_id) + as.numeric(b$couple)*1000000)
b$super_id2 = ifelse(b$month_id == "2", (as.numeric(b$id_month)-1)*10000 + as.numeric(b$month_id) + as.numeric(b$couple)*1000000, (as.numeric(b$id_month))*10000 + as.numeric(b$month_id) + as.numeric(b$couple)*1000000)
b           = b[order(b$super_id2),]
b           = b[order(b$unique_id),]
table(b$month_id)

b = b[which(b$YEAR %in% years_selected_transition),]
id_month_all = names(table(b$id_month))
## remove the first two month of month_id == 2
## remove the last two month of month_id == 1
b = b[which(!(b$id_month %in% id_month_all[c(1,2,3)] & b$month_id == 2)),]
b = b[which(!(b$id_month %in% id_month_all[c((length(id_month_all)-2),(length(id_month_all)-1),length(id_month_all))] & b$month_id == 1)),]
table(b$YEAR)

# remove_ID = b$unique_id[which(!(b$AGE %in% select_age_reg) & b$month_id == 1)]
# b = b[which(!(b$unique_id %in% remove_ID)),]
# remove_ID = b$unique_id[which(!(b$AGE %in% select_age_reg) & b$month_id == 2)]
# b = b[which(!(b$unique_id %in% remove_ID)),]
# table(b$AGE)
# head(b)
# table(b$month_id)

## check wrong match...
# Iterate over unique_id values and check the consistency of AGE
consistent_unique_ids <- b %>%
  group_by(unique_id) %>%
  summarize(age_range = max(AGE) - min(AGE)) %>%
  filter(age_range <= 1) %>%
  pull(unique_id)

# Get the inconsistent unique_id values
inconsistent_unique_ids <- setdiff(unique(b$unique_id), consistent_unique_ids)
b = b[which(!(b$unique_id %in% inconsistent_unique_ids)),]
table(b$month_id)

# Compute the transition between occupations 
lfsta_emp         = b$jobstatus4   ## variable for transition flows.
lfsta_emp_select  = b$jobstatus   ## variable for transition flows.
weight            = b$WTFINL        ## weights.
lfsta_emp1        = lfsta_emp[2:(length(lfsta_emp) - 1)]   
lfsta_emp         = lfsta_emp[1:(length(lfsta_emp) - 2)]
lfsta_emp_select1 =  lfsta_emp_select[2:(length(lfsta_emp_select) - 1)]
lfsta_emp_select  =  lfsta_emp_select[1:(length(lfsta_emp_select) - 2)]
weight1           = weight[2:(length(weight) - 1)]   
weight            = weight[1:(length(weight) - 2)]
corresp_id        = b$unique_id[1:(nrow(b) - 2)]

# Remark: if selected more periods (like 3), then do: by = 3, etc.
corresp_id        = corresp_id[seq(1, length(corresp_id), by=2)]
lfsta_emp         = lfsta_emp[seq(1, length(lfsta_emp)  , by=2)]
lfsta_emp1        = lfsta_emp1[seq(1, length(lfsta_emp1), by=2)] 
lfsta_emp_select  = lfsta_emp_select[seq(1, length(lfsta_emp_select)  , by=2)]
lfsta_emp_select1 = lfsta_emp_select1[seq(1, length(lfsta_emp_select1), by=2)] 
weight            = weight[seq(1, length(weight)        , by=2)]
weight1           = weight1[seq(1, length(weight1)      , by=2)]

data_flow         = as.data.frame(cbind(corresp_id, lfsta_emp, lfsta_emp1, as.numeric(weight), as.numeric(weight1), lfsta_emp_select, lfsta_emp_select1))

sum(table(data_flow$lfsta_emp,data_flow$lfsta_emp_select))
sum(table(data_flow$lfsta_emp))
sum(table(data_flow$lfsta_emp_select))

trans      = as.data.frame.matrix(xtabs(as.numeric(data_flow$V4) ~  data_flow$lfsta_emp_select + data_flow$lfsta_emp_select1))
trans
TRANS1     = trans[-1,-1]
rowsums    = rowSums(TRANS1)
for(i in 1:length(rowsums)) TRANS1[i, ] = TRANS1[i, ]/rowsums[i]
TRANS1

